Input for run_select_pattern.sh: RSEM result (GeneMat_HFD_Lingon_LDF.Ebseqresults/GeneMat_HFD_Lingon_LDF.Ebseqresults_FDR_0.05.tab)
Output: Gene list with EnsemblID and GeneSymbol, classified as diffent patterns (Output folder: Matrix/pattern)
#seperate genes based on different patterns
scr/run_select_pattern.sh
#full list of expressed genes
cut -f 1 Matrix/GeneMat_HFD_Lingon_LDF.Ebseqresults | sed $'s/_/\t/' | sed $'s/"//g'|tail -n +2 > Matrix/full_gene_list.csv
pattern2.FDR0.05 <- read.table("Matrix/pattern/pattern2_FDR0.05.csv", header = TRUE)
pattern3.FDR0.05 <- read.table("Matrix/pattern/pattern3_FDR0.05.csv", header = TRUE)
pattern4.FDR0.05 <- read.table("Matrix/pattern/pattern4_FDR0.05.csv", header = TRUE)
full_gene <- read.table("Matrix/full_gene_list.csv", header = FALSE)
names(full_gene) <- c("EnsemblID","GeneSymbol")
#load packages
library(clusterProfiler)
library(org.Mm.eg.db)
library(ggplot2)
library(gprofiler2)
Summarize the number of genes classified to each pattern (FDR cutoff = 0.05)
pattern_table <- read.table("Matrix/GeneMat_HFD_Lingon_LDF.Ebseqresults.pattern",header = TRUE)
names(pattern_table) <- c("HDF","Lingon","LFD")
RSEM_summary <-data.frame(c(0,155,40,123,0,318),
row.names = c("pattern1.FDR0.05","pattern2.FDR0.05","pattern3.FDR0.05","pattern4.FDR0.05","pattern5.FDR0.05","Sum"))
names(RSEM_summary) <- c("number of genes")
knitr::kable(pattern_table)
| HDF | Lingon | LFD | |
|---|---|---|---|
| Pattern1 | 1 | 1 | 1 |
| Pattern2 | 1 | 1 | 2 |
| Pattern3 | 1 | 2 | 1 |
| Pattern4 | 1 | 2 | 2 |
| Pattern5 | 1 | 2 | 3 |
knitr::kable(RSEM_summary)
| number of genes | |
|---|---|
| pattern1.FDR0.05 | 0 |
| pattern2.FDR0.05 | 155 |
| pattern3.FDR0.05 | 40 |
| pattern4.FDR0.05 | 123 |
| pattern5.FDR0.05 | 0 |
| Sum | 318 |
For example:
Genes belonging to Pattern 1 equals no differential change between any of the experiments
Genes belonging to Pattern 2 are differentially expressed in LFD compared to HFD & Lingon
Genes in Pattern 3 are DE in Lingon compared to the other experiments
Genes in Pattern 4 have similar expression in Lingon and LFD but are differentially expressed in HFD
(clusterProfiler v3.16.1 For help: https://guangchuangyu.github.io/software/clusterProfiler)
ego<- function(pattern_data){
ego_pattern <- enrichGO(gene = pattern_data[,1],
universe = full_gene[,1],
OrgDb = org.Mm.eg.db,
keyType = "ENSEMBL",
ont = 'ALL',
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
str1 <- "GO_results/clusterProfiler/"
str2 <- "_GO_enrich.csv"
file_name <- paste(str1, deparse(substitute(pattern_data)), str2, sep = "")
write.csv(as.data.frame(ego_pattern),file_name,row.names = F)
return(ego_pattern)
}
ego_pattern2.FDR0.05 <- ego(pattern2.FDR0.05)
barplot(ego_pattern2.FDR0.05,showCategory=20,drop=T)
dotplot(ego_pattern2.FDR0.05,showCategory=20)
heatplot(ego_pattern2.FDR0.05)
ego_pattern3.FDR0.05 <- ego(pattern3.FDR0.05)
barplot(ego_pattern3.FDR0.05,showCategory=20,drop=T)
dotplot(ego_pattern3.FDR0.05,showCategory=20)
heatplot(ego_pattern3.FDR0.05)
ego_pattern4.FDR0.05 <- ego(pattern4.FDR0.05)
barplot(ego_pattern4.FDR0.05,showCategory=20,drop=T)
dotplot(ego_pattern4.FDR0.05,showCategory=20)
heatplot(ego_pattern4.FDR0.05)
(gprofiler2 version 0.2.0 For help:https://cran.r-project.org/web/packages/gprofiler2/vignettes/gprofiler2.html)
gost_pattern2 <- gost(pattern2.FDR0.05[,1], organism ='mmusculus', sources = 'GO', significant = TRUE)
gostplot(gost_pattern2)
gost_pattern3 <- gost(pattern3.FDR0.05[,1], organism ='mmusculus', sources = 'GO', significant = TRUE)
gostplot(gost_pattern3)
gost_pattern4 <- gost(pattern4.FDR0.05[,1], organism ='mmusculus', sources = 'GO', significant = TRUE)
gostplot(gost_pattern4)
clusterProfiler GO analysis output
clusterProfiler GO analysis output files are in the ./GO_results/clusterProfiler
gprofiler GO analysis output - pattern2.FDR0.05
gprofiler GO analysis output - pattern3.FDR0.05
gprofiler GO analysis output - pattern4.FDR0.05
gprofiler GO analysis output files are in the ./GO_results/gprofiler